# ------------------------------------------------------------------------------#
# Description: Create Table A.2: Google Street View Summary Statistics
# Project:     Peer Effects in Voluntary Environmental Policies:
#              An Application to Urban Water Quality
# Author:      Daniel A. Brent | dab320@psu.edu
# Created:     2025-06-11
# ------------------------------------------------------------------------------#

# Clear environment ------------------------------------------------------------
rm(list = ls())
gc()

# Load required packages -------------------------------------------------------
library(data.table)
library(modelsummary)

options(scipen = 999)
options("modelsummary_format_numeric_latex" = "plain")

# Load and merge base data -----------------------------------------------------
load("data/adoptions/adoptions.RData")
load("data/bucket/peer_adopt_buckets_any.RData")
dat <- merge(adopt, peer.adopt.bucket.any, by = c("pin", "year"))
rm(adopt, peer.adopt.bucket.any)

load("data/bucket/peer_adopt_buckets_rg.RData")
dat <- merge(dat, peer.adopt.bucket.rg, by = c("pin", "year"))
rm(peer.adopt.bucket.rg)

load("data/bucket/peer_adopt_buckets_any_rg.RData")
dat <- merge(dat, peer.adopt.bucket.any.rg, by = c("pin", "year"))
rm(peer.adopt.bucket.any.rg)

load("data/bucket/peer_adopt_buckets_cs.RData")
dat <- merge(dat, peer.adopt.bucket.cs, by = c("pin", "year"))
rm(peer.adopt.bucket.cs)

load("data/bucket/peer_adopt_buckets_both.RData")
dat <- merge(dat, peer.adopt.bucket.both, by = c("pin", "year"))
rm(peer.adopt.bucket.both)

load("data/bucket/peer_elig_buckets_cs.RData")
dat <- merge(dat, peer.buckets.cs, by = c("pin", "year"))
rm(peer.buckets.cs)

load("data/bucket/peer_elig_buckets_rg.RData")
dat <- merge(dat, peer.buckets.rg, by = c("pin", "year"))
rm(peer.buckets.rg)

load("data/bucket/total_peers.RData")
dat <- merge(dat, total.peers, by = "pin")
rm(total.peers)

load("data/parcel_build/parcel_build.RData")
dat <- merge(dat, parcel.build[, .(pin, zip5, sub.area, sqft, lot, beds, baths,
                                   yearbuilt, val.land, val.improve,
                                   ren.pre90, ren.90.99, ren.00.09, ren.10,
                                   water.prob)], by = "pin")
rm(parcel.build)

load("data/census/all_parcels_census_2010.RData")
parcels.2010[, blk := as.numeric(paste0(block, blkgrp))]
dat <- merge(dat, parcels.2010[, .(pin, tract, blkgrp, block, blk)])
rm(parcels.2010)

# Scale peer adoption variables ------------------------------------------------
dat[, peer.adopt.any.100.scale    := peer.adopt.any.100 / 100]
dat[, peer.adopt.any.rg.100.scale := peer.adopt.any.rg.100 / 100]
dat[, peer.adopt.rg.100.scale     := peer.adopt.rg.100 / 100]
dat[, peer.adopt.cs.100.scale     := peer.adopt.cs.100 / 100]
dat[, peer.adopt.both.100.scale   := peer.adopt.both.100 / 100]

dat <- dat[year > 2010]
dat[, elig.peers.avg.100 := elig.peers.cs.100 + elig.peers.rg.100]

# Load and merge GSV visibility and sign data ----------------------------------
load("data/gsv/rw_gsv.RData")

load("data/gsv/peer_adopt_buckets_any_vis.RData")
dat <- merge(dat, peer.adopt.bucket.any.vis, by = c("pin", "year"))
rm(peer.adopt.bucket.any.vis)

load("data/gsv/peer_adopt_buckets_any_rg_vis.RData")
dat <- merge(dat, peer.adopt.bucket.any.rg.vis, by = c("pin", "year"))
rm(peer.adopt.bucket.any.rg.vis)

load("data/gsv/peer_adopt_buckets_cs_vis.RData")
dat <- merge(dat, peer.adopt.bucket.cs.vis, by = c("pin", "year"))
rm(peer.adopt.bucket.cs.vis)

load("data/gsv/peer_adopt_buckets_any_sign.RData")
dat <- merge(dat, peer.adopt.bucket.any.sign, by = c("pin", "year"))
rm(peer.adopt.bucket.any.sign)

load("data/gsv/peer_adopt_buckets_any_rg_sign.RData")
dat <- merge(dat, peer.adopt.bucket.any.rg.sign, by = c("pin", "year"))
rm(peer.adopt.bucket.any.rg.sign)

load("data/gsv/peer_adopt_buckets_cs_sign.RData")
dat <- merge(dat, peer.adopt.bucket.cs.sign, by = c("pin", "year"))
rm(peer.adopt.bucket.cs.sign)

# Prep rw.gsv data -------------------------------------------------------------
rw.gsv <- rw.gsv[install.year < 2021]
rw.gsv[, sign.duration := sign.max.year - sign.min.year]

rw.gsv[, dif.sign.yr2.yr1 := sign.year2 - sign.year1]
rw.gsv[, dif.sign.yr3.yr2 := sign.year3 - sign.year2]
rw.gsv[, dif.sign.yr4.yr3 := sign.year4 - sign.year3]
rw.gsv[, dif.sign.yr5.yr4 := sign.year5 - sign.year4]
rw.gsv[, dif.sign.yr6.yr5 := sign.year6 - sign.year5]
rw.gsv[, dif.sign.yr7.yr6 := sign.year7 - sign.year6]

rw.gsv[, dif.sign.avg := rowMeans(.SD, na.rm = TRUE), .SDcols = 32:37]
rw.gsv[, dif.sign.avg := fifelse(is.nan(dif.sign.avg), NA_real_, dif.sign.avg)]

# Summary statistics -----------------------------------------------------------
sum.rw.any.gsv <- rw.gsv[adopt.any==1,list(visible.any)]
setnames(sum.rw.any.gsv, c('Any'))
datasummary(All(sum.rw.any.gsv) ~ Mean + SD + N, fmt = 3, data = sum.rw.any.gsv)
any <- datasummary(All(sum.rw.any.gsv) ~ Mean + SD + N, fmt=3, data = sum.rw.any.gsv, output = 'data.frame')

sum.rw.rg.gsv <- rw.gsv[adopt.rg==1,list(visible.rg)]
setnames(sum.rw.rg.gsv, c('Rain Garden'))
datasummary(All(sum.rw.rg.gsv) ~ Mean + SD + N, fmt = 3, data = sum.rw.rg.gsv)
rg <- datasummary(All(sum.rw.rg.gsv) ~ Mean + SD + N, fmt=3, data = sum.rw.rg.gsv, output = 'data.frame')

sum.rw.cs.gsv <- rw.gsv[adopt.cs==1,list(visible.cs)]
setnames(sum.rw.cs.gsv, c('Cistern'))
datasummary(All(sum.rw.cs.gsv) ~ Mean + SD + N, fmt = 3, data = sum.rw.cs.gsv)
cs <- datasummary(All(sum.rw.cs.gsv) ~ Mean + SD + N, fmt=3, data = sum.rw.cs.gsv, output = 'data.frame')

sum.rw.both.gsv <- rw.gsv[adopt.both==1,list(visible.any)]
setnames(sum.rw.both.gsv, c('Both'))
datasummary(All(sum.rw.both.gsv) ~ Mean + SD + N, fmt = 3, data = sum.rw.both.gsv)
both <- datasummary(All(sum.rw.both.gsv) ~ Mean + SD + N, fmt=3, data = sum.rw.both.gsv, output = 'data.frame')


sum.sign <- rw.gsv[,list(sign.duration,dif.sign.avg)]
setnames(sum.sign, c('Duration (Years)','Years Between Images'))
datasummary(All(sum.sign) ~ Mean + SD + N, fmt = 3, data = sum.sign)
sign <- datasummary(All(sum.sign) ~ Mean + SD + N, fmt=3, data = sum.sign, output = 'data.frame')

sum.sign.any <- rw.gsv[adopt.any==1,list(sign)]
setnames(sum.sign.any, c(' Any '))
datasummary(All(sum.sign.any) ~ Mean + SD + N, fmt = 3, data = sum.sign.any)
sign.any <- datasummary(All(sum.sign.any) ~ Mean + SD + N, fmt=3, data = sum.sign.any, output = 'data.frame')

sum.sign.rg <- rw.gsv[adopt.rg==1,list(sign)]
setnames(sum.sign.rg, c(' Rain Garden '))
datasummary(All(sum.sign.rg) ~ Mean + SD + N, fmt = 3, data = sum.sign.rg)
sign.rg <- datasummary(All(sum.sign.rg) ~ Mean + SD + N, fmt=3, data = sum.sign.rg, output = 'data.frame')

sum.sign.cs <- rw.gsv[adopt.cs==1,list(sign)]
setnames(sum.sign.cs, c(' Cistern '))
datasummary(All(sum.sign.cs) ~ Mean + SD + N, fmt = 3, data = sum.sign.cs)
sign.cs <- datasummary(All(sum.sign.cs) ~ Mean + SD + N, fmt=3, data = sum.sign.cs, output = 'data.frame')

sum.sign.both <- rw.gsv[adopt.both==1,list(sign)]
setnames(sum.sign.both, c(' Both '))
datasummary(All(sum.sign.both) ~ Mean + SD + N, fmt = 3, data = sum.sign.both)
sign.both <- datasummary(All(sum.sign.both) ~ Mean + SD + N, fmt=3, data = sum.sign.both, output = 'data.frame')

# Combine for full table preview and export ------------------------------------
all.vis.sign <- rbind(
  sum.rw.any.gsv,
  sum.rw.rg.gsv,
  sum.rw.cs.gsv,
  sum.rw.both.gsv,
  sum.sign.any,
  sum.sign.rg,
  sum.sign.cs,
  sum.sign.both,
  sum.sign,
  fill = TRUE
)

new_rows <- data.frame(
  a = c("\\bf Visibility", "\\bf Sign"),
  b = c("", ""), c = c("", ""), d = c("", "")
)
attr(new_rows, "position") <- c(1, 6)

# Preview table
datasummary(All(all.vis.sign) ~ Mean + SD + N, fmt = 3, data = all.vis.sign, sparse_header = TRUE, add_rows = new_rows)

# Save table
datasummary(
  All(all.vis.sign) ~ Mean + SD + N,
  fmt = 3,
  data = all.vis.sign,
  sparse_header = TRUE,
  add_rows = new_rows,
  output = "output/tables/table_a2.tex"
)
